clear
set more off
capture log close
clear matrix


  *child ages to observe their adult incomes
  gl agemin=25 // youngest possible=25
  gl agemax=48 // oldest possible=48
  
  *child birth cohorts to use
  loc cohortmin=1952 // earliest possible=1952
  loc cohortmax=1993 // latest possible=1993
  
  
log using ${us_results}/US_irps_eduempIV.log, replace


  
*USE the ranked MAIN SAMPLE 
use *newid *LAB* AGE *cohort *LABYR* *MF* *empl* *schmax year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

gen AGEC=AGE
gen AGEC1=year-cohort-40 
gen f_LABAGE_1=f_LABYR-f_cohort 
gen m_LABAGE_1=m_LABYR-m_cohort 

forval i=2/4 {
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}
   

  * MOTHER's SAMPLE
 keep if pm_LAB!=. & m_LABAGE_1!=.
 keep if m_emplavg!=. & m_schmax!=. 
 keep if schmax!=.
 
* FATHER's SAMPLE
 keep if pf_LAB!=. & f_LABAGE_1!=.
 keep if f_emplavg!=. & f_schmax!=. 
 keep if schmax!=.
 

* RESDIUALIZE all measures 
 foreach var in pLAB employ schmax pm_LAB  m_schmax pf_LAB  f_schmax {
	qui regress `var' m_LABAGE_? f_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
	drop `var'
	rename `var'_r `var'
 }


************************
* MOTHERS 

**** Early period   
preserve

loc yrmin=1985
loc yrmax=1995

* Subsample 
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,${agemin},${agemax})
 
* IV for latent skill, only education

reg pLAB pm_LAB, cluster(newid)	
matrix irp =_b[pm_LAB]
matrix se= _se[pm_LAB]
matrix sirp =_b[pm_LAB]
matrix sse= _se[pm_LAB]

reg pf_LAB f_schmax
gen pr_pm_LAB = m_schmax*_b[f_schmax]

reg pLAB pr_pm_LAB, cluster(newid)	
matrix irp= irp, _b[pr_pm_LAB]
matrix se= se, _se[pr_pm_LAB]

sum pr_pm_LAB 
scalar sd_1 = r(sd)
sum pm_LAB 
scalar sd_2 = r(sd)
gen spr_pm_LAB = pr_pm_LAB * sd_2/sd_1

reg pLAB spr_pm_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr_pm_LAB]
matrix sse= sse, _se[spr_pm_LAB]

* IV, education and employment

reg pf_LAB f_schmax f_emplavg c.f_schmax#c.f_schmax
gen pr2_pm_LAB = m_schmax*_b[f_schmax] + m_emplavg*_b[f_emplavg] + m_schmax*m_emplavg*_b[c.f_schmax#c.f_schmax]

reg pLAB pr2_pm_LAB, cluster(newid)	// 0.051
matrix irp= irp, _b[pr2_pm_LAB]
matrix se= se, _se[pr2_pm_LAB]

sum pr2_pm_LAB 
scalar sd_1 = r(sd)
sum pm_LAB 
scalar sd_2 = r(sd)
gen spr2_pm_LAB = pr2_pm_LAB * sd_2/sd_1

reg pLAB spr2_pm_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr2_pm_LAB]
matrix sse= sse, _se[spr2_pm_LAB]

* IV, education and employment, conditional on spouse

reg pLAB pr2_pm_LAB pf_LAB f_schmax, cluster(newid)	
matrix irp= irp, _b[pr2_pm_LAB]
matrix se= se, _se[pr2_pm_LAB]

reg pLAB spr2_pm_LAB pf_LAB f_schmax, cluster(newid)		
matrix sirp= sirp, _b[spr2_pm_LAB]
matrix sse= sse, _se[spr2_pm_LAB]

restore 

**** Late period  
preserve

loc yrmin=2008
loc yrmax=2019

* Subsample 
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,${agemin},${agemax})
 
* IV for latent skill, only education

reg pLAB pm_LAB, cluster(newid)		
matrix irp =irp, _b[pm_LAB]
matrix se=se, _se[pm_LAB]
matrix sirp =sirp, _b[pm_LAB]
matrix sse=sse, _se[pm_LAB]

reg pf_LAB f_schmax
gen pr_pm_LAB = m_schmax*_b[f_schmax]

reg pLAB pr_pm_LAB, cluster(newid)	
matrix irp= irp, _b[pr_pm_LAB]
matrix se= se, _se[pr_pm_LAB]

sum pr_pm_LAB 
scalar sd_1 = r(sd)
sum pm_LAB 
scalar sd_2 = r(sd)
gen spr_pm_LAB = pr_pm_LAB * sd_2/sd_1

reg pLAB spr_pm_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr_pm_LAB]
matrix sse= sse, _se[spr_pm_LAB]

* IV, education and employment

reg pf_LAB f_schmax f_emplavg c.f_schmax#c.f_schmax
gen pr2_pm_LAB = m_schmax*_b[f_schmax] + m_emplavg*_b[f_emplavg] + m_schmax*m_emplavg*_b[c.f_schmax#c.f_schmax]

reg pLAB pr2_pm_LAB, cluster(newid)	
matrix irp= irp, _b[pr2_pm_LAB]
matrix se= se, _se[pr2_pm_LAB]

sum pr2_pm_LAB 
scalar sd_1 = r(sd)
sum pm_LAB 
scalar sd_2 = r(sd)
gen spr2_pm_LAB = pr2_pm_LAB * sd_2/sd_1

reg pLAB spr2_pm_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr2_pm_LAB]
matrix sse= sse, _se[spr2_pm_LAB]

* IV, education and employment, conditional on spouse

reg pLAB pr2_pm_LAB pf_LAB f_schmax, cluster(newid)	
matrix irp= irp, _b[pr2_pm_LAB]
matrix se= se, _se[pr2_pm_LAB]

reg pLAB spr2_pm_LAB pf_LAB f_schmax, cluster(newid)		
matrix sirp= sirp, _b[spr2_pm_LAB]
matrix sse= sse, _se[spr2_pm_LAB]
 
restore  

************************
* FATHERS 

***** Early period  
preserve

loc yrmin=1985
loc yrmax=1995

* Subsample 
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,${agemin},${agemax})
 
* IV for latent skill, only education

reg pLAB pf_LAB, cluster(newid)		
matrix irp =irp, _b[pf_LAB]
matrix se=se, _se[pf_LAB]
matrix sirp =sirp, _b[pf_LAB]
matrix sse=sse, _se[pf_LAB]

reg pf_LAB f_schmax
gen pr_pf_LAB = f_schmax*_b[f_schmax]

reg pLAB pr_pf_LAB, cluster(newid)	
matrix irp= irp, _b[pr_pf_LAB]
matrix se= se, _se[pr_pf_LAB]

sum pr_pf_LAB 
scalar sd_1 = r(sd)
sum pf_LAB 
scalar sd_2 = r(sd)
gen spr_pf_LAB = pr_pf_LAB * sd_2/sd_1

reg pLAB spr_pf_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr_pf_LAB]
matrix sse= sse, _se[spr_pf_LAB]

* IV, education and employment

reg pf_LAB f_schmax f_emplavg c.f_schmax#c.f_schmax
gen pr2_pf_LAB = f_schmax*_b[f_schmax] + f_emplavg*_b[f_emplavg] + f_schmax*f_emplavg*_b[c.f_schmax#c.f_schmax]

reg pLAB pr2_pf_LAB	
matrix irp= irp, _b[pr2_pf_LAB]
matrix se= se, _se[pr2_pf_LAB]

sum pr2_pf_LAB 
scalar sd_1 = r(sd)
sum pf_LAB 
scalar sd_2 = r(sd)
gen spr2_pf_LAB = pr2_pf_LAB * sd_2/sd_1

reg pLAB spr2_pf_LAB		
matrix sirp= sirp, _b[spr2_pf_LAB]
matrix sse= sse, _se[spr2_pf_LAB]

* IV, education and employment, conditional on spouse

reg pLAB pr2_pf_LAB pm_LAB m_schmax	
matrix irp= irp, _b[pr2_pf_LAB]
matrix se= se, _se[pr2_pf_LAB]

reg pLAB spr2_pf_LAB  pm_LAB m_schmax	
matrix sirp= sirp, _b[spr2_pf_LAB]
matrix sse= sse, _se[spr2_pf_LAB]
 
restore 

***** Late period for fathers 
preserve

loc yrmin=2008
loc yrmax=2019

* Subsample 
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,${agemin},${agemax})
 
* IV for latent skill, only education

reg pLAB pf_LAB, cluster(newid)		
matrix irp =irp, _b[pf_LAB]
matrix se=se, _se[pf_LAB]
matrix sirp =sirp, _b[pf_LAB]
matrix sse=sse, _se[pf_LAB]

reg pf_LAB f_schmax
gen pr_pf_LAB = f_schmax*_b[f_schmax]

reg pLAB pr_pf_LAB, cluster(newid)	
matrix irp= irp, _b[pr_pf_LAB]
matrix se= se, _se[pr_pf_LAB]

sum pr_pf_LAB 
scalar sd_1 = r(sd)
sum pf_LAB 
scalar sd_2 = r(sd)
gen spr_pf_LAB = pr_pf_LAB * sd_2/sd_1

reg pLAB spr_pf_LAB, cluster(newid)		
matrix sirp= sirp, _b[spr_pf_LAB]
matrix sse= sse, _se[spr_pf_LAB]

* IV, education and employment

reg pf_LAB f_schmax f_emplavg c.f_schmax#c.f_schmax
gen pr2_pf_LAB = f_schmax*_b[f_schmax] + f_emplavg*_b[f_emplavg] + f_schmax*f_emplavg*_b[c.f_schmax#c.f_schmax]

reg pLAB pr2_pf_LAB	
matrix irp= irp, _b[pr2_pf_LAB]
matrix se= se, _se[pr2_pf_LAB]

sum pr2_pf_LAB 
scalar sd_1 = r(sd)
sum pf_LAB 
scalar sd_2 = r(sd)
gen spr2_pf_LAB = pr2_pf_LAB * sd_2/sd_1

reg pLAB spr2_pf_LAB		
matrix sirp= sirp, _b[spr2_pf_LAB]
matrix sse= sse, _se[spr2_pf_LAB]

* IV, education and employment, conditional on spouse

reg pLAB pr2_pf_LAB pm_LAB m_schmax	
matrix irp= irp, _b[pr2_pf_LAB]
matrix se= se, _se[pr2_pf_LAB]

reg pLAB spr2_pf_LAB  pm_LAB m_schmax	
matrix sirp= sirp, _b[spr2_pf_LAB]
matrix sse= sse, _se[spr2_pf_LAB]
 
restore  
 
 


**** Save results to datset 

clear 
set obs 1
gen temp=1 
 svmat irp
 svmat se
 svmat sirp
 svmat sse

 reshape long irp se sirp sse, i(temp) j(regnum) 
 drop temp 

gen mother=. 	// mother IRP?
replace mother=1 if inrange(regnum,1,8)
replace mother=0 if inrange(regnum,9,16)

gen period=.	// time period
replace period=1 if inlist(regnum,1,2,3,4,9,10,11,12)
replace period=2 if inlist(regnum,5,6,7,8,13,14,15,16)

gen specif=.	// specification (3 for each parent/period)
replace specif=1 if inlist(regnum,1,5,9,13)
replace specif=2 if inlist(regnum,2,6,10,14)
replace specif=3 if inlist(regnum,3,7,11,15)
replace specif=4 if inlist(regnum,4,8,12,16)

label define regnum_vals ///
1 "IRP" ///
2 "IRP education TSIV" ///
3 "IRP educ. and empl. TSIV" ///
4 "IRP educ. and empl. TSIV, cond." ///
5 "IRP" ///
6 "IRP education TSIV" ///
7 "IRP educ. and empl. TSIV" ///
8 "IRP educ. and empl. TSIV, cond." ///
9 "IRP" ///
10 "IRP education TSIV" ///
11 "IRP educ. and empl. TSIV" //////
12 "IRP educ. and empl. TSIV, cond." ///
13 "IRP" ///
14 "IRP education TSIV" ///
15 "IRP educ. and empl. TSIV" ///
16 "IRP educ. and empl. TSIV, cond." ///

label values regnum regnum_vals 
order regnum mother period specif, first 
 
qui compress
save ${us_results}/US_irps_eduempIV.dta, replace


clear
log close 

